import statsmodels.api as sm
import pandas as pd
import statsmodels.formula.api as smf
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.linear_model import LinearRegression
import numpy as np
df=pd.read_csv('/Users/maha/Downloads/wages_by_education.csv')
df.head()
| year | less_than_hs | high_school | some_college | bachelors_degree | advanced_degree | men_less_than_hs | men_high_school | men_some_college | men_bachelors_degree | ... | black_women_less_than_hs | black_women_high_school | black_women_some_college | black_women_bachelors_degree | black_women_advanced_degree | hispanic_women_less_than_hs | hispanic_women_high_school | hispanic_women_some_college | hispanic_women_bachelors_degree | hispanic_women_advanced_degree | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 16.52 | 21.94 | 24.81 | 41.60 | 53.22 | 17.99 | 24.08 | 27.96 | 49.01 | ... | 13.89 | 17.83 | 20.36 | 29.94 | 39.41 | 14.74 | 18.18 | 20.64 | 31.13 | 40.64 |
| 1 | 2021 | 16.74 | 22.28 | 24.92 | 41.32 | 53.45 | 18.34 | 24.36 | 27.96 | 47.83 | ... | 13.73 | 18.30 | 20.18 | 29.35 | 40.07 | 14.97 | 18.34 | 21.14 | 31.25 | 42.47 |
| 2 | 2020 | 17.02 | 22.70 | 25.44 | 41.65 | 53.74 | 18.76 | 25.09 | 28.55 | 48.15 | ... | 13.66 | 17.93 | 20.31 | 31.38 | 42.44 | 14.58 | 18.50 | 20.69 | 31.55 | 44.15 |
| 3 | 2019 | 16.11 | 21.64 | 24.00 | 39.61 | 51.57 | 17.55 | 23.99 | 26.99 | 45.74 | ... | 13.30 | 17.18 | 19.17 | 30.31 | 40.42 | 14.50 | 17.71 | 19.69 | 30.18 | 42.30 |
| 4 | 2018 | 15.94 | 21.50 | 23.70 | 38.87 | 51.03 | 17.70 | 23.72 | 26.61 | 44.97 | ... | 12.48 | 16.81 | 18.57 | 30.85 | 39.64 | 13.47 | 17.28 | 19.29 | 29.47 | 39.35 |
5 rows × 61 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50 entries, 0 to 49 Data columns (total 61 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 50 non-null int64 1 less_than_hs 50 non-null float64 2 high_school 50 non-null float64 3 some_college 50 non-null float64 4 bachelors_degree 50 non-null float64 5 advanced_degree 50 non-null float64 6 men_less_than_hs 50 non-null float64 7 men_high_school 50 non-null float64 8 men_some_college 50 non-null float64 9 men_bachelors_degree 50 non-null float64 10 men_advanced_degree 50 non-null float64 11 women_less_than_hs 50 non-null float64 12 women_high_school 50 non-null float64 13 women_some_college 50 non-null float64 14 women_bachelors_degree 50 non-null float64 15 women_advanced_degree 50 non-null float64 16 white_less_than_hs 50 non-null float64 17 white_high_school 50 non-null float64 18 white_some_college 50 non-null float64 19 white_bachelors_degree 50 non-null float64 20 white_advanced_degree 50 non-null float64 21 black_less_than_hs 50 non-null float64 22 black_high_school 50 non-null float64 23 black_some_college 50 non-null float64 24 black_bachelors_degree 50 non-null float64 25 black_advanced_degree 50 non-null float64 26 hispanic_less_than_hs 50 non-null float64 27 hispanic_high_school 50 non-null float64 28 hispanic_some_college 50 non-null float64 29 hispanic_bachelors_degree 50 non-null float64 30 hispanic_advanced_degree 50 non-null float64 31 white_men_less_than_hs 50 non-null float64 32 white_men_high_school 50 non-null float64 33 white_men_some_college 50 non-null float64 34 white_men_bachelors_degree 50 non-null float64 35 white_men_advanced_degree 50 non-null float64 36 black_men_less_than_hs 50 non-null float64 37 black_men_high_school 50 non-null float64 38 black_men_some_college 50 non-null float64 39 black_men_bachelors_degree 50 non-null float64 40 black_men_advanced_degree 50 non-null float64 41 hispanic_men_less_than_hs 50 non-null float64 42 hispanic_men_high_school 50 non-null float64 43 hispanic_men_some_college 50 non-null float64 44 hispanic_men_bachelors_degree 50 non-null float64 45 hispanic_men_advanced_degree 50 non-null float64 46 white_women_less_than_hs 50 non-null float64 47 white_women_high_school 50 non-null float64 48 white_women_some_college 50 non-null float64 49 white_women_bachelors_degree 50 non-null float64 50 white_women_advanced_degree 50 non-null float64 51 black_women_less_than_hs 50 non-null float64 52 black_women_high_school 50 non-null float64 53 black_women_some_college 50 non-null float64 54 black_women_bachelors_degree 50 non-null float64 55 black_women_advanced_degree 50 non-null float64 56 hispanic_women_less_than_hs 50 non-null float64 57 hispanic_women_high_school 50 non-null float64 58 hispanic_women_some_college 50 non-null float64 59 hispanic_women_bachelors_degree 50 non-null float64 60 hispanic_women_advanced_degree 50 non-null float64 dtypes: float64(60), int64(1) memory usage: 24.0 KB
fig = px.line(df, x='year', y=['less_than_hs', 'high_school', 'some_college', 'bachelors_degree', 'advanced_degree'],
labels={'value': 'Wage', 'variable': 'Education Level'},
title='Wages by Education Level in the USA (1973-2022)')
fig.update_layout(
xaxis_title='Year',
yaxis_title='Wage',
legend_title='Education Level',
hovermode='x'
)
fig.show()
df['women_total'] = df[['women_less_than_hs', 'women_high_school', 'women_some_college', 'women_bachelors_degree', 'women_advanced_degree']].sum(axis=1)
df['men_total'] = df[['men_less_than_hs', 'men_high_school', 'men_some_college', 'men_bachelors_degree', 'men_advanced_degree']].sum(axis=1)
sns.scatterplot(x='women_total', y='year', data=df, label='Women')
sns.scatterplot(x='men_total', y='year', data=df, label='Men')
plt.xlabel('Wage')
plt.ylabel('Year')
plt.title('Scatter Plot of Wages by Gender')
plt.legend()
plt.show()
model = smf.ols(formula = "year ~ women_total", data = df).fit()
model.summary()
| Dep. Variable: | year | R-squared: | 0.911 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.909 |
| Method: | Least Squares | F-statistic: | 493.4 |
| Date: | Wed, 06 Dec 2023 | Prob (F-statistic): | 6.66e-27 |
| Time: | 17:34:01 | Log-Likelihood: | -143.84 |
| No. Observations: | 50 | AIC: | 291.7 |
| Df Residuals: | 48 | BIC: | 295.5 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 1852.4782 | 6.558 | 282.474 | 0.000 | 1839.292 | 1865.664 |
| women_total | 1.2473 | 0.056 | 22.213 | 0.000 | 1.134 | 1.360 |
| Omnibus: | 2.036 | Durbin-Watson: | 0.242 |
|---|---|---|---|
| Prob(Omnibus): | 0.361 | Jarque-Bera (JB): | 1.706 |
| Skew: | -0.450 | Prob(JB): | 0.426 |
| Kurtosis: | 2.905 | Cond. No. | 1.24e+03 |
model = smf.ols(formula = "year ~ men_total", data = df).fit()
model.summary()
| Dep. Variable: | year | R-squared: | 0.718 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.712 |
| Method: | Least Squares | F-statistic: | 122.1 |
| Date: | Wed, 06 Dec 2023 | Prob (F-statistic): | 8.70e-15 |
| Time: | 17:34:01 | Log-Likelihood: | -172.78 |
| No. Observations: | 50 | AIC: | 349.6 |
| Df Residuals: | 48 | BIC: | 353.4 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 1809.6309 | 17.037 | 106.219 | 0.000 | 1775.376 | 1843.886 |
| men_total | 1.1955 | 0.108 | 11.051 | 0.000 | 0.978 | 1.413 |
| Omnibus: | 2.704 | Durbin-Watson: | 0.175 |
|---|---|---|---|
| Prob(Omnibus): | 0.259 | Jarque-Bera (JB): | 2.486 |
| Skew: | -0.534 | Prob(JB): | 0.289 |
| Kurtosis: | 2.766 | Cond. No. | 2.43e+03 |
sns.scatterplot(x='high_school', y='year', data=df, label='High School Diploma')
sns.scatterplot(x='advanced_degree', y='year', data=df, label='Advanced Degree')
plt.xlabel('Wage')
plt.ylabel('Year')
plt.title('Scatter Plot of Wages by Gender')
plt.legend()
plt.show()
model = smf.ols(formula = "year ~ high_school", data = df).fit()
model.summary()
| Dep. Variable: | year | R-squared: | 0.029 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.009 |
| Method: | Least Squares | F-statistic: | 1.435 |
| Date: | Wed, 06 Dec 2023 | Prob (F-statistic): | 0.237 |
| Time: | 17:34:02 | Log-Likelihood: | -203.68 |
| No. Observations: | 50 | AIC: | 411.4 |
| Df Residuals: | 48 | BIC: | 415.2 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 1927.6802 | 58.311 | 33.059 | 0.000 | 1810.438 | 2044.922 |
| high_school | 3.3444 | 2.791 | 1.198 | 0.237 | -2.268 | 8.957 |
| Omnibus: | 5.666 | Durbin-Watson: | 0.010 |
|---|---|---|---|
| Prob(Omnibus): | 0.059 | Jarque-Bera (JB): | 2.882 |
| Skew: | -0.339 | Prob(JB): | 0.237 |
| Kurtosis: | 2.039 | Cond. No. | 595. |
model = smf.ols(formula = "year ~ advanced_degree", data = df).fit()
model.summary()
| Dep. Variable: | year | R-squared: | 0.934 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.933 |
| Method: | Least Squares | F-statistic: | 679.2 |
| Date: | Wed, 06 Dec 2023 | Prob (F-statistic): | 5.54e-30 |
| Time: | 17:34:02 | Log-Likelihood: | -136.47 |
| No. Observations: | 50 | AIC: | 276.9 |
| Df Residuals: | 48 | BIC: | 280.8 |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| Intercept | 1880.9385 | 4.505 | 417.564 | 0.000 | 1871.882 | 1889.996 |
| advanced_degree | 2.6552 | 0.102 | 26.061 | 0.000 | 2.450 | 2.860 |
| Omnibus: | 1.376 | Durbin-Watson: | 0.347 |
|---|---|---|---|
| Prob(Omnibus): | 0.503 | Jarque-Bera (JB): | 1.048 |
| Skew: | -0.354 | Prob(JB): | 0.592 |
| Kurtosis: | 2.962 | Cond. No. | 372. |
selected_year = [2020,2021,2022]
filtered_df = df[df['year'].isin(selected_year)]
filtered_df
| year | less_than_hs | high_school | some_college | bachelors_degree | advanced_degree | men_less_than_hs | men_high_school | men_some_college | men_bachelors_degree | ... | black_women_less_than_hs | black_women_high_school | black_women_some_college | black_women_bachelors_degree | black_women_advanced_degree | hispanic_women_less_than_hs | hispanic_women_high_school | hispanic_women_some_college | hispanic_women_bachelors_degree | hispanic_women_advanced_degree | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 16.52 | 21.94 | 24.81 | 41.60 | 53.22 | 17.99 | 24.08 | 27.96 | 49.01 | ... | 13.89 | 17.83 | 20.36 | 29.94 | 39.41 | 14.74 | 18.18 | 20.64 | 31.13 | 40.64 |
| 1 | 2021 | 16.74 | 22.28 | 24.92 | 41.32 | 53.45 | 18.34 | 24.36 | 27.96 | 47.83 | ... | 13.73 | 18.30 | 20.18 | 29.35 | 40.07 | 14.97 | 18.34 | 21.14 | 31.25 | 42.47 |
| 2 | 2020 | 17.02 | 22.70 | 25.44 | 41.65 | 53.74 | 18.76 | 25.09 | 28.55 | 48.15 | ... | 13.66 | 17.93 | 20.31 | 31.38 | 42.44 | 14.58 | 18.50 | 20.69 | 31.55 | 44.15 |
3 rows × 61 columns
filtered_df.plot(x='year', y=['less_than_hs','high_school','some_college','bachelors_degree','advanced_degree'], kind='bar', legend=True)
plt.title('Bar Chart Example')
plt.xlabel('Years')
plt.ylabel('Values')
plt.show()
from pandasql import sqldf
df = pd.read_csv('/Users/maha/Downloads/wages_by_education.csv')
pysqldf = sqldf("SELECT * FROM df", globals())
sqldf("SELECT * FROM df")
| year | less_than_hs | high_school | some_college | bachelors_degree | advanced_degree | men_less_than_hs | men_high_school | men_some_college | men_bachelors_degree | ... | black_women_less_than_hs | black_women_high_school | black_women_some_college | black_women_bachelors_degree | black_women_advanced_degree | hispanic_women_less_than_hs | hispanic_women_high_school | hispanic_women_some_college | hispanic_women_bachelors_degree | hispanic_women_advanced_degree | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 16.52 | 21.94 | 24.81 | 41.60 | 53.22 | 17.99 | 24.08 | 27.96 | 49.01 | ... | 13.89 | 17.83 | 20.36 | 29.94 | 39.41 | 14.74 | 18.18 | 20.64 | 31.13 | 40.64 |
| 1 | 2021 | 16.74 | 22.28 | 24.92 | 41.32 | 53.45 | 18.34 | 24.36 | 27.96 | 47.83 | ... | 13.73 | 18.30 | 20.18 | 29.35 | 40.07 | 14.97 | 18.34 | 21.14 | 31.25 | 42.47 |
| 2 | 2020 | 17.02 | 22.70 | 25.44 | 41.65 | 53.74 | 18.76 | 25.09 | 28.55 | 48.15 | ... | 13.66 | 17.93 | 20.31 | 31.38 | 42.44 | 14.58 | 18.50 | 20.69 | 31.55 | 44.15 |
| 3 | 2019 | 16.11 | 21.64 | 24.00 | 39.61 | 51.57 | 17.55 | 23.99 | 26.99 | 45.74 | ... | 13.30 | 17.18 | 19.17 | 30.31 | 40.42 | 14.50 | 17.71 | 19.69 | 30.18 | 42.30 |
| 4 | 2018 | 15.94 | 21.50 | 23.70 | 38.87 | 51.03 | 17.70 | 23.72 | 26.61 | 44.97 | ... | 12.48 | 16.81 | 18.57 | 30.85 | 39.64 | 13.47 | 17.28 | 19.29 | 29.47 | 39.35 |
| 5 | 2017 | 15.92 | 21.26 | 23.31 | 38.65 | 49.40 | 17.63 | 23.47 | 25.91 | 44.50 | ... | 12.99 | 16.33 | 18.76 | 29.19 | 38.26 | 13.36 | 17.02 | 19.60 | 29.69 | 38.43 |
| 6 | 2016 | 15.45 | 21.00 | 23.35 | 38.73 | 49.64 | 16.95 | 23.01 | 25.98 | 44.98 | ... | 12.42 | 16.71 | 18.88 | 29.56 | 38.55 | 13.40 | 17.11 | 19.26 | 29.41 | 37.77 |
| 7 | 2015 | 15.25 | 20.81 | 23.17 | 37.96 | 48.42 | 16.63 | 22.93 | 25.99 | 43.47 | ... | 12.79 | 16.21 | 18.50 | 30.42 | 37.37 | 13.42 | 16.99 | 18.66 | 29.71 | 36.37 |
| 8 | 2014 | 14.66 | 20.34 | 22.48 | 36.47 | 47.27 | 15.95 | 22.40 | 24.96 | 41.14 | ... | 12.47 | 16.09 | 18.39 | 30.20 | 37.88 | 12.61 | 16.55 | 18.22 | 28.53 | 37.16 |
| 9 | 2013 | 14.62 | 20.31 | 22.50 | 37.00 | 48.54 | 15.86 | 22.28 | 25.07 | 42.34 | ... | 12.41 | 16.60 | 18.32 | 29.57 | 38.40 | 12.80 | 16.23 | 18.70 | 28.20 | 36.92 |
| 10 | 2012 | 14.80 | 20.50 | 22.56 | 36.76 | 48.47 | 16.15 | 22.65 | 25.05 | 42.25 | ... | 12.20 | 16.52 | 18.66 | 29.43 | 37.46 | 12.61 | 16.41 | 18.78 | 27.58 | 38.91 |
| 11 | 2011 | 14.96 | 20.67 | 22.90 | 36.44 | 47.56 | 16.16 | 22.81 | 25.34 | 41.41 | ... | 13.40 | 16.56 | 18.80 | 30.49 | 38.01 | 13.06 | 16.56 | 19.44 | 28.19 | 39.59 |
| 12 | 2010 | 15.04 | 21.06 | 23.56 | 37.42 | 48.80 | 16.32 | 23.28 | 26.24 | 42.71 | ... | 13.63 | 17.33 | 19.61 | 30.53 | 37.74 | 12.89 | 16.85 | 19.65 | 28.39 | 41.27 |
| 13 | 2009 | 15.41 | 21.42 | 23.82 | 37.30 | 49.12 | 16.91 | 23.74 | 26.75 | 43.04 | ... | 13.23 | 17.26 | 19.88 | 29.59 | 39.37 | 13.24 | 17.43 | 19.63 | 29.44 | 41.34 |
| 14 | 2008 | 15.08 | 20.93 | 23.52 | 37.04 | 47.55 | 16.56 | 23.30 | 26.32 | 42.34 | ... | 12.78 | 16.69 | 19.55 | 30.50 | 38.24 | 13.02 | 16.70 | 19.86 | 28.68 | 37.34 |
| 15 | 2007 | 15.19 | 21.04 | 23.84 | 37.23 | 47.31 | 16.60 | 23.39 | 26.66 | 42.55 | ... | 13.08 | 17.22 | 20.07 | 29.77 | 38.75 | 12.76 | 16.91 | 19.94 | 29.81 | 40.13 |
| 16 | 2006 | 14.90 | 21.17 | 23.69 | 36.99 | 47.23 | 16.31 | 23.55 | 26.39 | 41.97 | ... | 13.25 | 17.02 | 19.94 | 30.53 | 38.06 | 12.68 | 16.90 | 19.62 | 28.80 | 38.93 |
| 17 | 2005 | 14.96 | 21.07 | 23.76 | 36.89 | 47.09 | 16.46 | 23.34 | 26.49 | 41.96 | ... | 12.90 | 17.07 | 19.69 | 29.25 | 38.33 | 12.64 | 16.96 | 19.73 | 28.61 | 38.19 |
| 18 | 2004 | 15.13 | 21.33 | 24.11 | 36.78 | 47.48 | 16.66 | 23.74 | 27.03 | 41.74 | ... | 12.81 | 17.51 | 20.01 | 30.28 | 38.36 | 12.69 | 17.24 | 20.08 | 29.68 | 37.50 |
| 19 | 2003 | 15.27 | 21.50 | 24.19 | 37.14 | 46.79 | 16.79 | 23.89 | 27.04 | 42.18 | ... | 13.13 | 17.59 | 20.45 | 31.02 | 37.78 | 13.04 | 17.38 | 20.13 | 28.66 | 35.91 |
| 20 | 2002 | 15.18 | 21.39 | 24.25 | 37.26 | 47.36 | 16.70 | 23.92 | 27.10 | 42.36 | ... | 12.84 | 17.41 | 20.15 | 30.11 | 38.80 | 12.80 | 16.66 | 20.03 | 28.39 | 37.85 |
| 21 | 2001 | 14.88 | 21.10 | 24.05 | 37.07 | 46.23 | 16.23 | 23.67 | 27.12 | 42.14 | ... | 13.04 | 16.97 | 19.70 | 30.11 | 37.22 | 12.83 | 16.88 | 19.71 | 28.71 | 36.79 |
| 22 | 2000 | 14.67 | 20.80 | 23.68 | 36.44 | 45.99 | 16.13 | 23.48 | 26.78 | 41.41 | ... | 12.95 | 16.81 | 19.59 | 30.18 | 36.87 | 12.29 | 16.53 | 19.38 | 28.28 | 35.66 |
| 23 | 1999 | 14.61 | 20.67 | 23.36 | 35.76 | 46.02 | 16.16 | 23.31 | 26.34 | 40.71 | ... | 12.72 | 16.42 | 19.21 | 29.54 | 37.04 | 12.16 | 16.38 | 19.03 | 28.09 | 35.95 |
| 24 | 1998 | 14.57 | 20.39 | 23.02 | 34.97 | 44.49 | 16.04 | 22.99 | 25.86 | 39.63 | ... | 12.57 | 16.25 | 19.29 | 28.68 | 36.92 | 12.10 | 16.27 | 19.21 | 27.55 | 34.36 |
| 25 | 1997 | 14.07 | 19.95 | 22.54 | 33.44 | 43.68 | 15.44 | 22.58 | 25.33 | 37.84 | ... | 12.25 | 15.83 | 18.78 | 26.60 | 33.90 | 11.83 | 15.66 | 18.37 | 27.23 | 34.91 |
| 26 | 1996 | 13.95 | 19.62 | 22.11 | 32.50 | 43.35 | 15.45 | 22.11 | 24.79 | 36.80 | ... | 11.77 | 15.70 | 17.92 | 26.03 | 35.73 | 11.77 | 15.53 | 18.28 | 26.75 | 38.63 |
| 27 | 1995 | 13.95 | 19.68 | 22.04 | 32.86 | 43.27 | 15.39 | 22.20 | 24.78 | 37.07 | ... | 12.07 | 15.53 | 18.59 | 27.99 | 37.90 | 11.35 | 15.91 | 18.17 | 26.66 | 36.22 |
| 28 | 1994 | 14.33 | 19.88 | 22.26 | 32.72 | 43.46 | 15.72 | 22.39 | 24.89 | 36.89 | ... | 12.45 | 15.79 | 18.90 | 27.01 | 37.24 | 12.07 | 16.47 | 18.86 | 26.70 | 36.96 |
| 29 | 1993 | 14.60 | 19.65 | 22.27 | 32.16 | 41.45 | 16.09 | 22.18 | 24.84 | 36.37 | ... | 12.73 | 15.84 | 18.70 | 27.45 | 35.67 | 11.94 | 16.25 | 18.79 | 25.57 | 36.49 |
| 30 | 1992 | 14.87 | 19.65 | 22.26 | 32.16 | 40.94 | 16.42 | 22.32 | 24.95 | 36.38 | ... | 13.21 | 15.73 | 18.98 | 27.01 | 34.10 | 12.18 | 15.79 | 18.89 | 26.66 | 34.62 |
| 31 | 1991 | 14.99 | 19.77 | 22.49 | 31.87 | 40.71 | 16.66 | 22.49 | 25.46 | 36.36 | ... | 12.61 | 15.84 | 18.91 | 26.81 | 32.58 | 12.19 | 16.11 | 18.51 | 25.40 | 33.75 |
| 32 | 1990 | 15.05 | 19.74 | 22.58 | 32.21 | 39.95 | 16.90 | 22.69 | 25.75 | 36.77 | ... | 12.20 | 15.63 | 18.77 | 25.51 | 32.97 | 12.07 | 15.93 | 18.33 | 26.63 | 32.33 |
| 33 | 1989 | 15.46 | 20.07 | 22.61 | 31.89 | 39.80 | 17.63 | 23.31 | 25.78 | 36.60 | ... | 12.14 | 15.69 | 19.00 | 25.97 | 34.39 | 12.13 | 15.95 | 19.17 | 25.12 | 32.88 |
| 34 | 1988 | 15.79 | 20.66 | 23.03 | 33.41 | 41.12 | 18.02 | 24.18 | 26.71 | 39.34 | ... | 12.32 | 16.17 | 18.67 | 26.04 | 35.80 | 12.27 | 15.77 | 19.43 | 24.59 | 31.65 |
| 35 | 1987 | 15.76 | 20.60 | 23.17 | 33.19 | 40.55 | 18.05 | 24.17 | 27.02 | 39.07 | ... | 12.60 | 16.03 | 18.76 | 25.21 | 33.45 | 12.31 | 15.97 | 18.92 | 25.01 | 32.33 |
| 36 | 1986 | 16.10 | 20.61 | 23.12 | 32.67 | 39.88 | 18.44 | 24.24 | 27.04 | 38.52 | ... | 12.78 | 16.09 | 18.48 | 24.60 | 33.04 | 12.18 | 16.35 | 18.68 | 24.92 | 28.90 |
| 37 | 1985 | 16.08 | 20.40 | 22.60 | 31.50 | 38.31 | 18.46 | 24.14 | 26.25 | 37.00 | ... | 12.45 | 15.87 | 18.30 | 25.15 | 31.28 | 12.40 | 15.89 | 18.38 | 24.12 | 29.40 |
| 38 | 1984 | 16.21 | 20.30 | 22.37 | 31.07 | 37.45 | 18.66 | 24.06 | 25.97 | 36.63 | ... | 12.39 | 15.65 | 18.04 | 23.66 | 31.81 | 12.39 | 16.25 | 17.79 | 22.23 | 30.82 |
| 39 | 1983 | 16.43 | 20.43 | 22.28 | 30.73 | 36.78 | 18.89 | 24.33 | 26.04 | 36.21 | ... | 12.92 | 15.94 | 17.71 | 23.73 | 30.64 | 12.35 | 16.02 | 18.94 | 23.17 | 26.31 |
| 40 | 1982 | 16.73 | 20.65 | 22.43 | 30.55 | 36.08 | 19.34 | 24.64 | 26.22 | 35.80 | ... | 12.81 | 15.95 | 17.88 | 23.54 | 30.65 | 12.67 | 16.48 | 17.68 | 23.98 | 30.21 |
| 41 | 1981 | 17.07 | 20.68 | 22.47 | 30.17 | 35.32 | 19.64 | 24.84 | 26.24 | 35.41 | ... | 12.88 | 15.86 | 18.14 | 23.82 | 31.05 | 13.02 | 16.13 | 17.49 | 21.29 | 31.21 |
| 42 | 1980 | 17.42 | 20.90 | 22.73 | 30.04 | 35.53 | 20.08 | 25.18 | 26.63 | 35.16 | ... | 12.98 | 16.09 | 18.28 | 23.99 | 31.58 | 12.87 | 16.16 | 17.86 | 22.60 | 28.23 |
| 43 | 1979 | 17.95 | 21.62 | 23.20 | 30.57 | 36.19 | 20.74 | 26.04 | 27.18 | 35.72 | ... | 13.33 | 16.30 | 18.54 | 23.84 | 29.34 | 13.20 | 16.27 | 18.02 | 21.94 | 30.87 |
| 44 | 1978 | 17.29 | 21.52 | 23.25 | 30.90 | 37.56 | 20.10 | 25.97 | 27.14 | 35.92 | ... | 12.67 | 17.00 | 18.24 | 25.47 | 34.08 | 13.08 | 15.72 | 17.56 | 21.26 | 28.97 |
| 45 | 1977 | 17.59 | 21.50 | 22.97 | 31.07 | 37.36 | 20.43 | 25.97 | 26.70 | 36.09 | ... | 12.80 | 16.30 | 18.87 | 25.83 | 37.10 | 12.66 | 17.22 | 17.12 | 21.49 | 30.31 |
| 46 | 1976 | 17.52 | 21.76 | 23.49 | 31.46 | 37.50 | 20.36 | 26.14 | 27.10 | 36.42 | ... | 12.75 | 16.40 | 19.11 | 26.10 | 34.92 | 13.79 | 17.29 | 17.40 | 20.29 | 20.93 |
| 47 | 1975 | 17.30 | 21.55 | 23.30 | 31.45 | 38.41 | 20.00 | 26.02 | 26.93 | 36.21 | ... | 12.48 | 15.88 | 18.11 | 27.93 | 33.03 | 12.87 | 16.04 | 17.04 | 18.85 | 23.63 |
| 48 | 1974 | 17.68 | 21.60 | 23.32 | 31.69 | 38.37 | 20.63 | 26.15 | 26.79 | 36.62 | ... | 12.31 | 15.55 | 18.19 | 27.80 | 33.86 | 13.24 | 16.29 | 18.17 | 21.70 | 26.74 |
| 49 | 1973 | 18.06 | 22.22 | 24.08 | 32.80 | 38.16 | 21.18 | 26.90 | 27.67 | 37.69 | ... | 12.02 | 15.79 | 18.11 | 28.35 | 41.81 | 12.92 | 16.46 | 20.22 | 22.23 | 25.10 |
50 rows × 61 columns
comparison_result = (sqldf("SELECT year, AVG(men_high_school) AS men_high_school,AVG(women_high_school) AS women_high_school,AVG(men_advanced_degree) AS men_advanced_degree, AVG(women_advanced_degree) AS women_advanced_degree FROM df GROUP BY year"))
print("Overview of Data Manipulation:")
plt.figure(figsize=(12, 8))
plt.plot(comparison_result['year'], comparison_result['men_high_school'], label='Men - High School')
plt.plot(comparison_result['year'], comparison_result['men_advanced_degree'], label='Men - Advanced Degree')
plt.plot(comparison_result['year'], comparison_result['women_high_school'], label='Women - High School')
plt.plot(comparison_result['year'], comparison_result['women_advanced_degree'], label='Women - Advanced Degree')
plt.title('Comparison of Average Wages for High School-educated Men and Women Over the Years')
plt.xlabel('Year')
plt.ylabel('Average Wages')
plt.legend()
plt.show()
Overview of Data Manipulation: